vllm serve /root/private_data/my/Qwen/Qwen2___5-VL-32B-Instruct --tensor-parallel-size 2 --trust-remote-code --dtype float16 --max-model-len 64000 --gpu-memory-utilization 0.9 --served-model-name Qwen2___5-VL-32B-Instruct
